Python DataFrame Filling choices:
1. append
2. update
3. set_value
This Notebook compares the three methods using timeit.timeit function.
In [1]:
import pandas as pd
import datetime
import timeit
In [7]:
#Use append
pd_index=pd.DatetimeIndex(start=datetime.datetime(2016,10,1),end=datetime.datetime(2016,10,1),freq='1D')
df=pd.DataFrame(columns=['data'])
df_1=pd.DataFrame(data={'data':1231},index=pd_index)
def wrapper(func, *args, **kwargs):
def wrapped():
return func(*args, **kwargs)
return wrapped
def app(df,df1):
for i in range(0,100,1):
df=df.append(df_1)
wrapped = wrapper(app, df,df_1)
timeit.timeit(wrapped,number=100)
Out[7]:
In [8]:
# Use set value
pd_index=pd.DatetimeIndex(start=datetime.datetime(2016,10,1),end=datetime.datetime(2016,10,2),freq='1T')
df=pd.DataFrame(columns=['data'],index=pd_index)
def wrapper(func, *args, **kwargs):
def wrapped():
return func(*args, **kwargs)
return wrapped
def app(df):
for i in range(0,100,1):
df.set_value(pd_index[i],'data',1231)
wrapped = wrapper(app, df)
timeit.timeit(wrapped,number=100)
Out[8]:
In [9]:
# Use update value
pd_index=pd.DatetimeIndex(start=datetime.datetime(2016,10,1),end=datetime.datetime(2016,10,2),freq='1T')
df=pd.DataFrame(columns=['data'],index=pd_index)
def wrapper(func, *args, **kwargs):
def wrapped():
return func(*args, **kwargs)
return wrapped
def app(df):
for i in range(0,100,1):
df_1=pd.DataFrame(data={'data':1231},index=pd_index[:i])
df.update(df_1)
wrapped = wrapper(app, df)
timeit.timeit(wrapped,number=100)
Out[9]:
In [ ]: